library(tidyverse)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[30m── [1mAttaching packages[22m ─────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✓[30m [34mggplot2[30m 3.2.1 [32m✓[30m [34mpurrr [30m 0.3.3
[32m✓[30m [34mtibble [30m 2.1.3 [32m✓[30m [34mdplyr [30m 0.8.4
[32m✓[30m [34mtidyr [30m 1.0.2 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.3.1 [32m✓[30m [34mforcats[30m 0.5.0[39m
[30m── [1mConflicts[22m ────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
Purrrrrrrrrrrrrrrrrrrrrrr
map() is the main one.
Iterates over a data structure and runs a function that you provide on each “element” of the data structure. replaces ‘for’ loops.
map_
colour_feelings <- list(
blue = c("Sad", "Calm"),
red = c("Angry", "Energetic", "Warm"),
green = c("Calm", "Nature"),
yellow = c("Happy", "Warm", "Sunny")
)
list on which each element is a character vector.
colour_feelings$blue
[1] "Sad" "Calm"
for(item in colour_feelings){
print(length(item))
}
[1] 2
[1] 3
[1] 2
[1] 3
map(.x = colour_feelings, .f = length)
$blue
[1] 2
$red
[1] 3
$green
[1] 2
$yellow
[1] 3
map(.x = colour_feelings, .f = paste, collapse = ", ")
$blue
[1] "Sad, Calm"
$red
[1] "Angry, Energetic, Warm"
$green
[1] "Calm, Nature"
$yellow
[1] "Happy, Warm, Sunny"
colour_translator <- list(
blue = "gorm",
red = "dearg",
green = "uaine",
yellow = "buidhe"
)
We want to paste “Translation:” in front of each of these words
add_translation <- function(text){
return(paste("Translation: ", text))
}
add_translation("gorm")
[1] "Translation: gorm"
map(.x = colour_translator, .f = add_translation)
$blue
[1] "Translation: gorm"
$red
[1] "Translation: dearg"
$green
[1] "Translation: uaine"
$yellow
[1] "Translation: buidhe"
purrr let’s you define little “bespoke” custom-fitted functions to do wrangling.
map(.x = colour_translator, .f = ~ paste("Transalation: ", .x))
$blue
[1] "Transalation: gorm"
$red
[1] "Transalation: dearg"
$green
[1] "Transalation: uaine"
$yellow
[1] "Transalation: buidhe"
dataframes in R are just lists where each element is a vector and the label of the element is like the column head.
library(CodeClanData)
Attaching package: ‘CodeClanData’
The following object is masked from ‘package:dplyr’:
starwars
The following object is masked from ‘package:tidyr’:
population
The following object is masked from ‘package:datasets’:
volcano
colour_wavelengths <- list(
blue = 470,
red = 665,
green = 550,
yellow = 600
)
map(colour_translator, nchar)
$blue
[1] 4
$red
[1] 5
$green
[1] 5
$yellow
[1] 6
map(colour_wavelengths, round, digits = -2)
$blue
[1] 500
$red
[1] 700
$green
[1] 600
$yellow
[1] 600
map(colour_wavelengths, ~ .x/(1*(10^9)))
$blue
[1] 4.7e-07
$red
[1] 6.65e-07
$green
[1] 5.5e-07
$yellow
[1] 6e-07
#colour_feelings
map_dbl(colour_feelings, length)
blue red green yellow
2 3 2 3
map_int(colour_feelings, length)
blue red green yellow
2 3 2 3
flatten_chr(colour_feelings)
[1] "Sad" "Calm" "Angry" "Energetic" "Warm" "Calm"
[7] "Nature" "Happy" "Warm" "Sunny"
studentslist <- as.list(students)
students
$age_years
[1] 17 14 14 17 16 17 13 17 16 13 18 17 17 17
$reaction_time
[1] 0.4420 0.3940 0.5510 0.3550 0.3810 0.6010 0.5940 0.3940 0.4320 0.2990 0.0774
[12] 1.0000 0.4810 0.4500
$text_messages_sent_yesterday
[1] 45 28 5 35 15 100 31 0 200 0 100 200 35 20
$favorite_school_subject
[1] "English" "Mathematics and statistics"
[3] "Mathematics and statistics" "Music"
[5] "Mathematics and statistics" "Physical education"
[7] "Physical education" "Other"
[9] "Art" "Music"
[11] "Computers and technology" "English"
[13] "Mathematics and statistics" "Science"
$school_year
[1] "Year 11" "Year 9" "Year 8" "Year 12" "Year 12" "Year 12" "Year 8" "Year 12"
[9] "Year 12" "Year 7" "Year 12" "Year 12" "Year 12" "Year 11"
$height_cm
[1] 185.0 160.0 152.4 155.0 176.0 177.8 166.0 164.0 162.0 154.0 183.0 157.0 175.3
[14] 173.0
$sleep_hours_schoolnight
[1] 8.0 7.0 6.0 8.0 5.5 5.0 10.0 7.0 6.0 8.0 8.0 6.0 6.5 6.0
$superpower
[1] "Telepathy" "Invisibility" "Telepathy" "Fly" "Freeze time"
[6] "Invisibility" "Fly" "Telepathy" "Telepathy" "Invisibility"
[11] "Fly" "Fly" "Telepathy" "Telepathy"
$languages_spoken
[1] 1.0 2.0 1.0 1.0 2.0 2.0 2.0 1.0 3.0 2.0 2.0 1.0 1.5 2.0
$planned_education_level
[1] "Graduate degree" "Graduate degree" "Graduate degree"
[4] "High school" "Less than high school" "Graduate degree"
[7] "Graduate degree" "Graduate degree" "Graduate degree"
[10] "Graduate degree" "Graduate degree" "Graduate degree"
[13] "Graduate degree" "Graduate degree"
students <- map_df(students, sort)
conditional mapping:
blue <- list(
translation = "gorm",
feelings = c("Sad", "Calm"),
primary = "Yes",
wavelength = 470
)
# p is a predicate = logical i.e. condition/question
map_if(.x = blue, .p = is.character, .f = nchar)
$translation
[1] 4
$feelings
[1] 3 4
$primary
[1] 3
$wavelength
[1] 470
# apply paste only to elements of blue with a length > 1
map_if(.x = blue, .p = ~ length(.x) > 1, .f = paste, collapse = ", ")
$translation
[1] "gorm"
$feelings
[1] "Sad, Calm"
$primary
[1] "Yes"
$wavelength
[1] 470
str(colour_list)
List of 4
$ blue :List of 4
..$ name : chr "Blue"
..$ feelings : chr [1:2] "Sad" "Calm"
..$ primary : chr "Yes"
..$ wavelength: num 470
$ red :List of 4
..$ name : chr "Red"
..$ feelings : chr [1:3] "Angry" "Energetic" "Warm"
..$ primary : chr "Yes"
..$ wavelength: num 665
$ green :List of 4
..$ name : chr "Green"
..$ feelings : chr [1:2] "Calm" "Nature"
..$ primary : chr "No"
..$ wavelength: num 550
$ yellow:List of 4
..$ name : chr "Yellow"
..$ feelings : chr [1:3] "Happy" "Warm" "Sunny"
..$ primary : chr "Yes"
..$ wavelength: num 600
map(colour_list, "wavelength")
$blue
[1] 470
$red
[1] 665
$green
[1] 550
$yellow
[1] 600
map(colour_list, "feelings")
$blue
[1] "Sad" "Calm"
$red
[1] "Angry" "Energetic" "Warm"
$green
[1] "Calm" "Nature"
$yellow
[1] "Happy" "Warm" "Sunny"
map(colour_list, 4)
$blue
[1] 470
$red
[1] 665
$green
[1] 550
$yellow
[1] 600
colour_list_feelings <- map(colour_list, "feelings")
map(colour_list_feelings, length)
$blue
[1] 2
$red
[1] 3
$green
[1] 2
$yellow
[1] 3
API application programming interface APIs return JSON javascript object notation R converts json to nested list
Colours and themes
http://sape.inf.usi.ch/quick-reference/ggplot2/colour
Also look up hexidecimal colours
ggplot(students) +
aes(x = reaction_time) +
geom_histogram(fill = "magenta")
ggplot(students) +
aes(x = reaction_time) +
geom_histogram(fill = hcl(200, 50, 50))
ggplot(students) +
aes(x = reaction_time, y = height_cm) +
geom_point(colour = "violetred1")
ggplot(pets, aes(weight, age, colour = sex)) +
geom_point()
ggplot(pets, aes(weight, age, colour = sleep)) +
geom_point() +
scale_colour_gradient(low = "grey0", high = "grey100")
if colour is fill then scale is fill (make sense?)
ggplot(pets, aes(weight, age, colour = sleep)) +
geom_point() +
scale_colour_gradient(low = "seagreen2", high = "turquoise4")
ggplot(pets, aes(weight, age, colour = sleep)) +
geom_point() +
scale_colour_gradient2(low = "blue", high = "red", mid = "white", midpoint = 15)
Geom_raster is a heat map
ggplot(volcano, aes(x = x, y = y, fill = height)) +
geom_raster() +
scale_fill_gradientn(colours = c("chartreuse1", "maroon1"))
NA
ggplot(volcano, aes(x = x, y = y, fill = height)) +
geom_raster() +
scale_fill_gradientn(colours = colorspace::terrain_hcl(10))
https://colorbrewer2.org/#type=sequential&scheme=OrRd&n=3
ggplot(volcano, aes(x = x, y = y, fill = height)) +
geom_raster() +
scale_fill_distiller(palette = "OrRd")
ggplot(temp_df) +
geom_raster(aes(x = month, y = year, fill = max_temp)) +
scale_fill_gradient(low = "olivedrab2", high = "darkorchid2")
ggplot(temp_df) +
geom_raster(aes(x = month, y = year, fill = max_temp)) +
scale_fill_gradient2(low = "dodgerblue4", high = "deeppink4", mid = "darkorange2", midpoint = 10)
ggplot(temp_df) +
geom_raster(aes(x = month, y = year, fill = max_temp)) +
scale_fill_gradientn(colours = colorspace::terrain_hcl(28))
ggplot(temp_df) +
geom_raster(aes(x = month, y = year, fill = max_temp)) +
scale_fill_distiller(palette = "Dark2")
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar()
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_hue(h = c(120, 300))
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_brewer(palette = "Set1")
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_grey()
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_grey(start = 0, end = 0.5)
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_manual(values = c(
"Fly" = "red",
"Freeze time" = "blue",
"Invisibility" = "green",
"Telepathy" = "yellow"
))
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_manual(values = c("red", "blue", "green", "yellow"))
ggplot(students) +
aes(x = school_year, fill = superpower) +
geom_bar() +
scale_fill_manual( values = wes_palette("Grandbudapest1"))
Error in wes_palette("Grandbudapest1") :
could not find function "wes_palette"
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line()
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_hue(h = c(60, 2000))
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_grey(start = 0, end = 0.7)
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_brewer(palette = "Accent")
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_manual(values = c("red", "blue", "green", "orange"))
guide_colour_bar for continuous data
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_hue(guide = guide_legend(nrow = 3))
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_hue(guide = guide_legend(reverse = TRUE))
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
scale_colour_hue(guide = guide_legend(keywidth = 1, keyheight = 6, reverse = TRUE))
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
theme_grey(base_size = 20)
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
labs(
title = "Typical Diet of a Chinese Citizen"
) +
theme(
title = element_text(size = 20, colour = "red", face = "bold")
)
?theme
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
labs(
title = "Typical Diet of a Chinese Citizen"
) +
theme(
axis.title.x = element_text(size = 20, colour = "red", face = "bold"),
panel.grid.major = element_line(colour = "black", linetype = "dotted", size = 0.3),
plot.background = element_rect(fill = "limegreen")
)
ggplot(chinesemeal) +
aes(x = Year, colour = FoodType, y = CaloriesPerDay) +
geom_line() +
labs(
title = "Typical Diet of a Chinese Citizen"
) +
theme(
axis.title.x = element_text(size = 20, colour = "red", face = "bold"),
panel.grid.major = element_line(colour = "black", linetype = "dotted", size = 0.3),
plot.background = element_rect(fill = "limegreen"),
legend.text = element_blank()
)
install.packages("ggthemes")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/ggthemes_4.2.0.tgz'
Content type 'application/x-gzip' length 420298 bytes (410 KB)
==================================================
downloaded 410 KB
The downloaded binary packages are in
/var/folders/tc/k70prjwj5rs5hjh7zmt604_00000gn/T//RtmpJhNoxO/downloaded_packages
library(ggthemes)
ggplot(scottish_exports) +
geom_line(aes(x = year, y = exports, colour = sector)) +
facet_wrap(~sector, scales = 'free_y') +
theme_excel() +
theme(
axis.title.x = element_text(size = 20, colour = "blue", face = "bold"),
axis.title.y = element_text(size = 20, colour = "blue", face = "bold")
)
Change order: make into ordered factor
total_sales %>%
mutate(branch = factor(branch, levels = c("London", "leeds", "Glasgow", "Leatherhed", "Edinburgh", "Manchester", "Welyn Garden City")))
ggplot(total_sales) +
aes(x = branch, y = sales) +
geom_col() +
coord_flip()
total_sales <- total_sales %>%
mutate(branch = fct_reorder(branch, sales))
ggplot(total_sales) +
aes(x = branch, y = sales) +
geom_col() +
coord_flip()